---
title: "alive_thrive_aim2"
author: "Caroline Joyce"
date: "8/12/2020"
output:
  word_document: default
  html_document: default
---

```{r}
library(tidyverse)
setwd("~/Dropbox/A&T study of CoE/data/")
df <- read.csv("alive_thrive_clean.csv")

#analyses for aim 2
table(df$enroll_date)
table(df$dob)
table(df$enrolled)

#having r recognize dob and enroll_date as dates
df$dob <- as.Date(df$dob)
df$enroll_date <- as.Date(df$enroll_date)


#creating binary yes/no versions that exclude "don't know" for composite score
df$excl_bf <- ifelse(df$Q02a == 1, 1, 0)
df$skin <- ifelse(df$Q04 == 1, 1, 0)
df$bf_90 <- ifelse(df$Q06 == 1, 1, 0)
#reverse coding formula counseling score -- 1 if they were *not* given counseling to use formula
df$no_formula <- ifelse(df$Q07 == 0, 1, 0)
df$counseling <- ifelse(df$Q08 == 1, 1, 0)
#recoding formula counseling -- 1 if they were *not* counseled to use formula
df$formula_counseling <- ifelse(df$Q09 == 0, 1, 0)

#making new early initiation of breastfeeding variable
table(df$Q05a)
df$s2s[df$Q05a==1] <- 0
df$s2s[df$Q05a==2] <- 1
df$s2s[df$Q05a==9] <- 0

df$eibf <- ifelse(df$bf_90 == 1 & df$s2s == 1, 1, 0)

#calculating proportion of EIBF in each group
table(df$eibf)

#creating c-section variable
df$section <- ifelse(df$Q03 == 2, 1, 0)

table(df$section)


vag <- subset(df, Q03 == 1)
section <- subset(df, Q03 == 2)

table(section$eibf)

#creating EBF at interview date variable
df$exclusive <- ifelse(df$Q02a ==1, 1, 0)

#recoding Q02 as numeric
df$Q02 <- as.numeric(df$Q02)
#recoding hospital as numeric
df$Hospital <- as.numeric(df$Hospital)
df$Hospital <- as.factor(df$Hospital)

#composite score outcome
df$bf_score <- df$Q02 + df$excl_bf + df$skin + df$bf_90 + df$no_formula + df$counseling + df$formula_counseling

#creating dummies for month
df$month <- df$Bmonth
#recoding those who were born in 2020 for month 13, 14, 15
df$month <- ifelse(df$dob == "2020-01-15", 13, df$month)
df$month <- ifelse(df$dob == "2020-02-15", 14, df$month)
df$month <- ifelse(df$dob == "2020-03-15", 15, df$month)
str(df$month)
#df$month <- as.factor(df$month)
table(df$month)


#dummying quarter
df$Quarter <- as.factor(df$Quarter)

#creating lead and lag variables by hospital. Dates are counted as # of days since 1/1/1960, so if looking for date before we signify
#as "<", if looking for date after we signify as ">"
library(lubridate)
df$lead5 <- ifelse(df$dob <= (df$enroll_date %m-% months(5)), 1, 0)
df$lead4 <- ifelse(df$dob <= (df$enroll_date %m-% months(4))& (df$dob > df$enroll_date %m-% months(5)), 1, 0)
df$lead3 <- ifelse(df$dob <= (df$enroll_date %m-% months(3)) & (df$dob > df$enroll_date %m-% months(4)), 1, 0)
df$lead2 <- ifelse(df$dob <= (df$enroll_date %m-% months(2)) & (df$dob > df$enroll_date %m-% months(3)), 1, 0)
df$lead1 <- ifelse(df$dob <= (df$enroll_date %m-% months(1)) & (df$dob > df$enroll_date %m-% months(2)), 1, 0)
df$leadlag0 <- ifelse(df$dob >= (df$enroll_date %m-% days(30)) & (df$dob <= df$enroll_date %m+% days(30)), 1, 0)
df$lag1 <- ifelse(df$dob >= (df$enroll_date %m+% months(1)) & (df$dob < df$enroll_date %m+% months(2)), 1, 0)
df$lag2 <- ifelse(df$dob >= (df$enroll_date %m+% months(2)) & (df$dob < df$enroll_date %m+% months(3)), 1, 0)
df$lag3 <- ifelse(df$dob >= (df$enroll_date %m+% months(3)) & (df$dob < df$enroll_date %m+% months(4)), 1, 0)
df$lag4 <- ifelse(df$dob >= (df$enroll_date %m+% months(4)) & (df$dob < df$enroll_date %m+% months(5)), 1, 0)
df$lag5 <- ifelse(df$dob >= (df$enroll_date %m+% months(5)), 1, 0)
#df$lag4 <- ifelse(df$dob >= (df$enroll_date %m+% months(4)), 1, 0)


#checking to make sure have captured all participants with enrollment dates.
#there is one participant with an enrollment data but not a dob, so we have 10,472 entries with timing and 10,473 with enrollment dates.
sum(table(df$lead1+df$lead2+df$lead3+df$leadlag0+df$lag1+df$lag2+df$lag3))

#creating timing variable
df$timing[df$lead5 == 1] <- -5
df$timing[df$lead4 == 1] <- -4
df$timing[df$lead3 == 1] <- -3
df$timing[df$lead2 == 1] <- -2
df$timing[df$lead1 == 1] <- -1
df$timing[df$leadlag0 == 1] <- 0
df$timing[df$lag1 == 1] <- 1
df$timing[df$lag2 == 1] <- 2
df$timing[df$lag3 == 1] <- 3
df$timing[df$lag4 == 1] <- 4
df$timing[df$lag5 == 1] <- 5

#leaving timing as continuous for ITS models
#df$timing <- as.factor(df$timing)

df$event_time <- factor(df$timing, levels = c("0","-5", "-4", "-3", "-2", "-1", "1", "2", "3", "4", "5"))



#pre-enrollment rates of the main three outcomes
unenrolled <- subset(df, enrolled == 0)
mean(unenrolled$eibf, na.rm = TRUE)
mean(unenrolled$no_formula, na.rm = TRUE)
mean(unenrolled$counseling, na.rm = TRUE)
mean(unenrolled$exclusive, na.rm = TRUE)

unenrolled_section <- subset(section, enrolled == 0)
mean(unenrolled_section$eibf, na.rm = TRUE)
mean(unenrolled_section$no_formula, na.rm = TRUE)
mean(unenrolled_section$counseling, na.rm = TRUE)
mean(unenrolled_section$exclusive, na.rm = TRUE)

unenrolled_vaginal <- subset(vag, enrolled == 0)
mean(unenrolled_vaginal$eibf, na.rm = TRUE)
mean(unenrolled_vaginal$no_formula, na.rm = TRUE)
mean(unenrolled_vaginal$counseling, na.rm = TRUE)
mean(unenrolled_vaginal$exclusive, na.rm = TRUE)

private <- subset(df, private == 1)
public <- subset(df, private == 0)

unenrolled_private <- subset(private, enrolled == 0)
mean(unenrolled_private$eibf, na.rm = TRUE)
mean(unenrolled_private$no_formula, na.rm = TRUE)
mean(unenrolled_private$counseling, na.rm = TRUE)
mean(unenrolled_private$exclusive, na.rm = TRUE)

unenrolled_public <- subset(public, enrolled == 0)
mean(unenrolled_public$eibf, na.rm = TRUE)
mean(unenrolled_public$no_formula, na.rm = TRUE)
mean(unenrolled_public$counseling, na.rm = TRUE)
mean(unenrolled_public$exclusive, na.rm = TRUE)

#write.csv(df, "alive_thrive_clean_20201021.csv")

```



```{r}
#ITS model
library(lme4)
library(ggplot2)
library(multcomp)
library(stargazer)



df$time <- df$timing

#re-run using timing (-5 to 5) instead of continuous month

#early_centered = lmer(bf_90 ~ time + enrolled + time*enrolled + (1 | Hospital), data = df)
#summary(early_centered)
#confint(early_centered)

exclusive_centered <- lmer(no_formula ~ time + enrolled + time*enrolled + (1 | Hospital), data = df)
summary(exclusive_centered)
confint(exclusive_centered)


counseling_centered <- lmer(counseling ~ time + enrolled + time*enrolled + (1 | Hospital), data = df)
summary(counseling_centered)
confint(counseling_centered)

#running with new eibf score
eibf = lmer(eibf ~ time + enrolled + time*enrolled + (1 | Hospital), data = df)
summary(eibf)
confint(eibf)

#running new exclusively breastfeeding after discharge analysis
exclusive_int <- lmer(exclusive ~ time + enrolled + time*enrolled + (1 | Hospital), data = df)
summary(exclusive_int)
confint(exclusive_int)

stargazer(eibf, exclusive_centered, counseling_centered, exclusive_int, type = "html", out = "ITS_mixed_models_centeredtime.htm")

#stratified analyses

#stratified by c-section vs. vaginal
table(df$Q03)

eibf_section = lmer(eibf ~ time + enrolled + time*enrolled + (1 | Hospital), data = section)
summary(eibf_section)
confint(eibf_section)

exclusive_centered_section <- lmer(no_formula ~ time + enrolled + time*enrolled + (1 | Hospital), data = section)
summary(exclusive_centered_section)
confint(exclusive_centered_section)


counseling_centered_section <- lmer(counseling ~ time + enrolled + time*enrolled + (1 | Hospital), data = section)
summary(counseling_centered_section)
confint(counseling_centered_section)

exclusive_int_section <- lmer(exclusive ~ time + enrolled + time*enrolled + (1 | Hospital), data = 
                                section)
summary(exclusive_int_section)
confint(exclusive_int_section)

stargazer(eibf_section, exclusive_centered_section, counseling_centered_section, exclusive_int_section, type = "html", digits = 2, out = "ITS_centered_models_section.htm")

#stratified by vaginal births
eibf_vag = lmer(eibf ~ time + enrolled + time*enrolled + (1 | Hospital), data = vag)
summary(eibf_vag)
confint(eibf_vag)

exclusive_centered_vag <- lmer(no_formula ~ time + enrolled + time*enrolled + (1 | Hospital), data = vag)
summary(exclusive_centered_vag)
confint(exclusive_centered_vag)


counseling_centered_vag <- lmer(counseling ~ time + enrolled + time*enrolled + (1 | Hospital), data = vag)
summary(counseling_centered_vag)
confint(counseling_centered_vag)

exclusive_int_vag <- lmer(exclusive ~ time + enrolled + time*enrolled + (1 | Hospital), data = 
                                vag)
summary(exclusive_int_vag)
confint(exclusive_int_vag)

stargazer(eibf_vag, exclusive_centered_vag, counseling_centered_vag, exclusive_int_vag, type = "html", digits = 2, out = "ITS_centered_models_vag.htm")



#stratified by private hospitals

#rerunning with centered timing
#early_centered_private = lmer(bf_90 ~ time + enrolled + time*enrolled + (1 | Hospital), data = private)
#summary(early_centered_private)
#confint(early_centered_private)

eibf_private = lmer(eibf ~ time + enrolled + time*enrolled + (1 | Hospital), data = private)
summary(eibf_private)
confint(eibf_private)

exclusive_centered_private <- lmer(no_formula ~ time + enrolled + time*enrolled + (1 | Hospital), data = private)
summary(exclusive_centered_private)
confint(exclusive_centered_private)


counseling_centered_private <- lmer(counseling ~ time + enrolled + time*enrolled + (1 | Hospital), data = private)
summary(counseling_centered_private)
confint(counseling_centered_private)

exclusive_int_private <- lmer(exclusive ~ time + enrolled + time*enrolled + (1 | Hospital), data = 
                                private)
summary(exclusive_int_private)
confint(exclusive_int_private)

stargazer(eibf_private, exclusive_centered_private, counseling_centered_private, exclusive_int_private, type = "html", digits = 2, out = "ITS_centered_models_private.htm")

#centered public hospitals


#early_centered_public = lmer(bf_90 ~ time + enrolled + time*enrolled + (1 | Hospital), data = public)
#summary(early_centered_public)
#confint(early_centered_public)

eibf_public = lmer(eibf ~ time + enrolled + time*enrolled + (1 | Hospital), data = public)
summary(eibf_public)
confint(eibf_public)

exclusive_centered_public <- lmer(no_formula ~ time + enrolled + time*enrolled + (1 | Hospital), data = public)
summary(exclusive_centered_public)
confint(exclusive_centered_public)


counseling_centered_public <- lmer(counseling ~ time + enrolled + time*enrolled + (1 | Hospital), data = public)
summary(counseling_centered_public)
confint(counseling_centered_public)

exclusive_int_public <- lmer(exclusive ~ time + enrolled + time*enrolled + (1 | Hospital), data = 
                                public)
summary(exclusive_int_public)
confint(exclusive_int_public)

stargazer(eibf_public, exclusive_centered_public, counseling_centered_public, exclusive_int_public, type = "html", digits = 2, out = "ITS_centered_models_public.htm")



###plots





ggplot(df1, aes(month, bf_90, group = month >= 7)) +
  stat_summary(fun = mean, geom = "point") +
  stat_summary(fun = mean, geom = "line") +
#  stat_summary(fun.data = mean_se, geom = "errorbar", width = .2) +
#  geom_smooth(method = "lm", color = "black", linetype="dotted")+
#  stat_smooth(method="lm",fill=NA,colour="black",linetype=2,geom="ribbon") +
  geom_vline(xintercept = 6.8, linetype = "dotdash", size = 1) + 
#                color = "red", size=1) +
#  geom_vline(xintercept = 6, linetype="dotted", 
#                 color = "red", size=1.5) +
  labs(y = "proportion of births with early BF in hospital") +
  theme_minimal()

ggplot(df1, aes(time, bf_90, group = time >= 0)) +
  stat_summary(fun = mean, geom = "point") +
  stat_summary(fun = mean, geom = "line") +
#  stat_summary(fun.data = mean_se, geom = "errorbar", width = .2) +
  geom_smooth(method = "lm", color = "black", linetype="dotted")+
#  stat_smooth(method="lm",fill=NA,colour="black",linetype=2,geom="ribbon") +
  geom_vline(xintercept = 0, linetype = "dotdash", size = 1) + 
#                color = "red", size=1) +
#  geom_vline(xintercept = 6, linetype="dotted", 
#                 color = "red", size=1.5) +
  labs(y = "proportion of births with early BF in hospital") +
  theme_minimal()

#NEW PLOTS
df1 <- df %>% filter(!is.na(enrolled))
df1 <- df1 %>% filter(!is.na(bf_90))
df1 <- df1 %>% filter(!is.na(no_formula))
df1 <- df1 %>% filter(!is.na(counseling))
df1 <- df1 %>% filter(!is.na(bf_score))
df1 <- df1 %>% filter(!is.na(eibf))
df1 <- df1 %>% filter(!is.na(exclusive))

eibf_fig <- ggplot(df1, aes(timing, eibf)) +
  stat_summary(fun = mean, geom = "point") +
  stat_summary(fun = mean, geom = "line") +
#  geom_smooth(method = "lm", color = "black", linetype="dotted")+
  geom_vline(xintercept = 0, linetype = "dotdash", size = 1) + 
  labs(y = "EIBF in hospital") +
  labs(x = "month relative to enrollment") +
  scale_x_continuous(breaks = c(-5, -2, 0, 2, 5), labels = c("-5", "-2", "0", "2", "5"))+
  theme_minimal()


exclbf_fig <- ggplot(df1, aes(timing, no_formula)) +
  stat_summary(fun = mean, geom = "point") +
  stat_summary(fun = mean, geom = "line") +
#  geom_smooth(method = "lm", color = "black", linetype="dotted")+
  geom_vline(xintercept = 0, linetype = "dotdash", size = 1) + 
  labs(y = "EBF in hospital") +
  labs(x = "month relative to enrollment") +
  scale_x_continuous(breaks = c(-5, -2, 0, 2, 5), labels = c("-5", "-2", "0", "2", "5"))+
  theme_minimal()

lactcouns_fig <- ggplot(df1, aes(timing, counseling)) +
  stat_summary(fun = mean, geom = "point") +
  stat_summary(fun = mean, geom = "line") +
#  geom_smooth(method = "lm", color = "black", linetype="dotted")+
  geom_vline(xintercept = 0, linetype = "dotdash", size = 1) + 
  labs(y = "lactation counseling in hospital") +
  scale_x_continuous(breaks = c(-5, -2, 0, 2, 5), labels = c("-5", "-2", "0", "2", "5"))+
  labs(x = "month relative to enrollment") +
  theme_minimal()

exclus_survey_plot <- ggplot(df1, aes(timing, exclusive)) +
  stat_summary(fun = mean, geom = "point") +
  stat_summary(fun = mean, geom = "line") +
#  geom_smooth(method = "lm", color = "black", linetype="dotted")+
  geom_vline(xintercept = 0, linetype = "dotdash", size = 1) + 
  labs(y = "EBF at survey time") +
  scale_x_continuous(breaks = c(-5, -2, 0, 2, 5), labels = c("-5", "-2", "0", "2", "5"))+
  labs(x = "month relative to enrollment") +
  theme_minimal()


library("ggpubr")
theme_set(
  theme_bw() +
    theme(legend.position = "top")
  )
figure1 <- ggarrange(eibf_fig, exclbf_fig, lactcouns_fig, exclus_survey_plot,
                    labels = c("A", "B", "C", "D"),
                    ncol = 2, nrow = 2)

ggsave("fig1.pdf", plot = figure1)

#ggsave("eibf_plot.pdf", plot = eibf_fig)
#ggsave("exclusive_plot.pdf", plot = exclbf_fig)
#ggsave("lactcouns_plot.pdf", plot = lactcouns_fig)
#ggsave("exclusive_survey.pdf", plot = exclus_survey_plot)

##making graphs for vaginal vs. cesarean birth
df1$birth_modality <- ifelse(df1$Q03 == 1, "Vaginal", "Cesarean")

eibf_fig_birthmod <- ggplot(df1, aes(timing, eibf, group=birth_modality, color=birth_modality)) +
  stat_summary(fun = mean, geom = "point") +
  stat_summary(fun = mean, geom = "line") +
  geom_vline(xintercept = 0, linetype = "dotdash", size = 1) + 
  labs(y = "EIBF in hospital") +
  labs(x = "month relative to enrollment") +
  scale_x_continuous(breaks = c(-5, -2, 0, 2, 5), labels = c("-5", "-2", "0", "2", "5"))+
  scale_color_grey("Birth Modality", start = 0.6, end = 0.2)+
  theme_minimal()

ebf_fig_birthmod <- ggplot(df1, aes(timing, no_formula, group=birth_modality, color=birth_modality)) +
  stat_summary(fun = mean, geom = "point") +
  stat_summary(fun = mean, geom = "line") +
  geom_vline(xintercept = 0, linetype = "dotdash", size = 1) + 
  labs(y = "EBF in hospital") +
  labs(x = "month relative to enrollment") +
  scale_x_continuous(breaks = c(-5, -2, 0, 2, 5), labels = c("-5", "-2", "0", "2", "5"))+
  scale_color_grey("Birth Modality", start = 0.6, end = 0.2)+
  theme_minimal()

lc_fig_birthmod <- ggplot(df1, aes(timing, counseling, group=birth_modality, color=birth_modality)) +
  stat_summary(fun = mean, geom = "point") +
  stat_summary(fun = mean, geom = "line") +
  geom_vline(xintercept = 0, linetype = "dotdash", size = 1) + 
  labs(y = "Lactation counseling in hospital") +
  labs(x = "month relative to enrollment") +
  scale_x_continuous(breaks = c(-5, -2, 0, 2, 5), labels = c("-5", "-2", "0", "2", "5"))+
  scale_color_grey("Birth Modality", start = 0.6, end = 0.2)+
  theme_minimal()

ebf_home_fig_birthmod <- ggplot(df1, aes(timing, exclusive, group=birth_modality, color=birth_modality)) +
  stat_summary(fun = mean, geom = "point") +
  stat_summary(fun = mean, geom = "line") +
  geom_vline(xintercept = 0, linetype = "dotdash", size = 1) + 
  labs(y = "EBF at Survey") +
  labs(x = "month relative to enrollment") +
  scale_x_continuous(breaks = c(-5, -2, 0, 2, 5), labels = c("-5", "-2", "0", "2", "5"))+
  scale_color_grey("Birth Modality", start = 0.6, end = 0.2)+
  theme_minimal()

figure2 <- ggarrange(eibf_fig_birthmod, ebf_fig_birthmod, lc_fig_birthmod, ebf_home_fig_birthmod, 
                     labels = c("A", "B", "C", "D"), hjust = -5,
                     ncol = 2, nrow = 2, common.legend = TRUE)

ggsave("figure2.pdf", figure2)

figure2

##making graphs for private vs. public hospitals
df1$private_chr <- ifelse(df1$private == 1, "Private", "Public")

eibf_fig_hosp <- ggplot(df1, aes(timing, eibf, group=private_chr, color=private_chr)) +
  stat_summary(fun = mean, geom = "point") +
  stat_summary(fun = mean, geom = "line") +
  geom_vline(xintercept = 0, linetype = "dotdash", size = 1) + 
  labs(y = "EIBF in hospital") +
  labs(x = "month relative to enrollment") +
  scale_x_continuous(breaks = c(-5, -2, 0, 2, 5), labels = c("-5", "-2", "0", "2", "5"))+
  scale_color_grey("Type of Hospital", start = 0.6, end = 0.2)+
  theme_minimal()

ebf_fig_hosp <- ggplot(df1, aes(timing, no_formula, group=private_chr, color=private_chr)) +
  stat_summary(fun = mean, geom = "point") +
  stat_summary(fun = mean, geom = "line") +
  geom_vline(xintercept = 0, linetype = "dotdash", size = 1) + 
  labs(y = "EBF in hospital") +
  labs(x = "month relative to enrollment") +
  scale_x_continuous(breaks = c(-5, -2, 0, 2, 5), labels = c("-5", "-2", "0", "2", "5"))+
  scale_color_grey("Type of Hospital", start = 0.6, end = 0.2)+
  theme_minimal()

lc_fig_hosp <- ggplot(df1, aes(timing, counseling, group=private_chr, color=private_chr)) +
  stat_summary(fun = mean, geom = "point") +
  stat_summary(fun = mean, geom = "line") +
  geom_vline(xintercept = 0, linetype = "dotdash", size = 1) + 
  labs(y = "Lactation counseling in hospital") +
  labs(x = "month relative to enrollment") +
  scale_x_continuous(breaks = c(-5, -2, 0, 2, 5), labels = c("-5", "-2", "0", "2", "5"))+
  scale_color_grey("Type of Hospital", start = 0.6, end = 0.2)+
  theme_minimal()

ebf_home_fig_hosp <- ggplot(df1, aes(timing, exclusive, group=private_chr, color=private_chr)) +
  stat_summary(fun = mean, geom = "point") +
  stat_summary(fun = mean, geom = "line") +
  geom_vline(xintercept = 0, linetype = "dotdash", size = 1) + 
  labs(y = "EBF at Survey") +
  labs(x = "month relative to enrollment") +
  scale_x_continuous(breaks = c(-5, -2, 0, 2, 5), labels = c("-5", "-2", "0", "2", "5"))+
  scale_color_grey("Type of Hospital", start = 0.6, end = 0.2)+
  theme_minimal()

figure3 <- ggarrange(eibf_fig_hosp, ebf_fig_hosp, lc_fig_hosp, ebf_home_fig_hosp, 
                     labels = c("A", "B", "C", "D"), hjust = -5,
                     ncol = 2, nrow = 2, common.legend = TRUE)

figure3


ggsave("figure3.pdf", figure3)



```
```{r}

#12/4/20 running check per reviewers comments where am comparing mother's only responses to full dataset

mother <- subset(df, Q00 == 1)
mother_exclusive_centered <- lmer(no_formula ~ time + enrolled + time*enrolled + (1 | Hospital), data = mother)
summary(mother_exclusive_centered)

mother_eibf = lmer(eibf ~ time + enrolled + time*enrolled + (1 | Hospital), data = mother)
summary(mother_eibf)

mother_exclusive_int <- lmer(exclusive ~ time + enrolled + time*enrolled + (1 | Hospital), data = mother)
summary(mother_exclusive_int)

mother_counseling_centered <- lmer(counseling ~ time + enrolled + time*enrolled + (1 | Hospital), data = mother)
summary(mother_counseling_centered)

summary(counseling_centered)
```


